library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

use work.core_pkg.all;
use work.mem_pkg.all;
use work.op_pkg.all;

entity mem is
	port (
		clk           : in  std_logic;
		res_n         : in  std_logic;
		stall         : in  std_logic;
		flush         : in  std_logic;

		-- to Ctrl
		mem_busy      : out std_logic;

		-- from EXEC
		mem_op        : in  mem_op_type;
		wbop_in       : in  wb_op_type;
		pc_new_in     : in  pc_type;
		pc_old_in     : in  pc_type;
		aluresult_in  : in  data_type;
		wrdata        : in  data_type;
		zero          : in  std_logic;

		-- to EXEC (forwarding)
		reg_write     : out reg_write_type;

		-- to FETCH
		pc_new_out    : out pc_type;
		pcsrc         : out std_logic;

		-- to WB
		wbop_out      : out wb_op_type;
		pc_old_out    : out pc_type;
		aluresult_out : out data_type;
		memresult     : out data_type;

		-- memory controller interface
		mem_out       : out mem_out_type;
		mem_in        : in  mem_in_type;

		-- exceptions
		exc_load      : out std_logic;
		exc_store     : out std_logic
	);
end entity;

architecture rtl of mem is
	type internal_t is record
		memop     : mem_op_type;
		wbop      : wb_op_type;
		pc_new    : pc_type;
		pc_old    : pc_type;
		aluresult : data_type;
		wrdata    : data_type;
		zero      : std_logic;
		memresult : mem_data_type; --this is for critical path see sync process
	end record;
	
	signal internal : internal_t;
	constant INITIAL_INTERNAL : internal_t := (
		MEM_NOP,
		WB_NOP,
		(others => '0'),
		(others => '0'),
		(others => '0'),
		(others => '0'),
		'0',
		(others => '0')
	);
begin

	-- structural
	memu_inst : entity work.memu
	port map (
		op => internal.memop.mem,
		A  => internal.aluresult,
		W  => internal.wrdata,
		R  => memresult,
		B  => mem_busy,
		XL => exc_load,
		XS => exc_store,
		D  => mem_in,
		M  => mem_out
	);
		
	-- concurrent
	pc_new_out <= internal.pc_new;
	pcsrc <= '1' when 
		(internal.memop.branch = BR_BR) or
		(internal.memop.branch = BR_CND and internal.zero = '1') or
		(internal.memop.branch = BR_CNDI and internal.zero = '0')
		else '0';
	aluresult_out <= internal.aluresult;
	pc_old_out <= internal.pc_old;
	wbop_out <= internal.wbop when flush = '0' else WB_NOP;
	
	
	--fwd cntrl
	reg_write.write <= internal.wbop.write;
	reg_write.reg <= internal.wbop.rd;
	with internal.wbop.src select reg_write.data <= 
		internal.aluresult when WBS_ALU,
		internal.memresult when WBS_MEM,
		std_logic_vector(x"00000000" + unsigned(internal.pc_old) + x"4") when WBS_OPC;
	
	-- synchronous
	sync : process(clk, res_n, stall)
	begin
		if res_n = '0' then
			internal <= INITIAL_INTERNAL;
		elsif stall = '0' and rising_edge(clk) then
			internal.memop     <= mem_op;
			internal.wbop      <= wbop_in;
			internal.pc_new    <= pc_new_in;
			internal.pc_old    <= pc_old_in;
			internal.aluresult <= aluresult_in;
			internal.wrdata    <= wrdata;
			internal.zero      <= zero;
			internal.memresult <= memresult; -- memresult gets registered every clock, this signal is used for for forwarding, this is no problem because on a load forward the pipeline is stalled for a cycle
		elsif stall = '1' and rising_edge(clk) then
			internal.memop.mem.memread  <= '0';
			internal.memop.mem.memwrite <= '0';
			internal.memresult <= memresult;
		end if;
	end process;
	
		
		

end architecture;
